#!/usr/bin/env python
"""Create species.inc file from MFiX project definition"""

import codecs
import os
import re
import sys
import time

if len(sys.argv) != 2:
    print("Usage: %s file.mfx"%sys.argv[0])
    sys.exit(1)

input_filename = sys.argv[1]
output_filename = "species.inc"

# Note, these regexes will match commented-out keys,
#  so we strip comments out of input before matching

# Regex to match species_alias_s(m,n) = "name"
species_alias_s_re = re.compile(r"""
    species_alias_s               # key
    [ \t]*\(                      # opening paren
    [ \t]*\d+,[ \t]*(\d+)         # two indices, capturing only the second
    [ \t]*\)                      # closing paren
    [ \t]*=                       # equal sign, possible whitespace
    [ \t]*['"]                    # either flavor of quote
    ([A-Za-z]+[A-Za-z0-9_]*)      # species alias, alphanumeric or underscore, cannot start with digit
    ['"]                          # closing quote
""", re.VERBOSE|re.IGNORECASE)

# Regex to match species_alias_g(n) = "name"
species_alias_g_re = re.compile(r"""
    species_alias_g               # key
    [ \t]*\(                      # opening paren
    [ \t]*(\d+)                   # single index, capturing
    [ \t]*\)                      # closing paren
    [ \t]*=                       # equal sign, possible whitespace
    [ \t]*['"]                    # either flavor of quote
    ([A-Za-z]+[A-Za-z0-9_]*)      # species alias, alphanumeric or underscore, cannot start with digit
    ['"]                          # closing quote
""", re.VERBOSE|re.IGNORECASE)

# Regex to match entire @(RXNS) block (multiline)
rxns_re = re.compile(r"""
    @\(RXNS\)                    # start of reactions block
    (.*?)                        # non-greedy match, in case of multiple @(END)
    @\(END\)                     # end of block
""", re.VERBOSE|re.DOTALL|re.MULTILINE)

# Regex to match entire @(DES_RXNS) block (multiline)
des_rxns_re = re.compile(r"""
    @\(DES_RXNS\)                # start of DES reactions block
    (.*?)                        # non-greedy match
    @\((END|DES_END)\)           # can @(END) close @(DES_RXNS) block?
""", re.VERBOSE|re.DOTALL|re.MULTILINE)

# Regex to match a single reaction
rxn_re = re.compile(r"""
    ^                          # start of line
    [ \t]*                     # possible whitespace
    ([a-zA-z]+[a-zA-Z0-9_]*)   # reaction id
    \s*                        # whitespace (possibly multi-line)
    {                          # opening brace
    .*?                        # non-greedy match for text in braces
    }                          # closing brace
""", re.VERBOSE|re.MULTILINE|re.DOTALL)

print("Processing chemical reaction data from %s... " %
      os.path.basename(input_filename))

def strip_comment(line):
    line = line.strip()
    for c in '!#':
        line = line.split(c)[0]
    return line

data = '\n'.join(strip_comment(line) for line in codecs.open(input_filename, "r", 'utf-8'))
output = [] # list of 2-tuples (val, name) transformed into output file

# Define constants for all species aliases
for r in species_alias_g_re, species_alias_s_re:
    output.extend(r.findall(data))

# Define constants for all reactions
for r in rxns_re, des_rxns_re:
    match = r.search(data)
    if match:
        output.extend(enumerate(rxn_re.findall(match.group(1)), 1))

names = [name for (id, name) in output]

for name in names:
    if names.count(name) > 1:
        print("ERROR, duplicate entry for", name)
        sys.exit(1)

with codecs.open(output_filename, 'w', 'utf-8') as f:
    f.write("!This file was generated by %s\n" % sys.argv[0])
    f.write("!from %s on %s\n" % (input_filename, time.ctime()))
    f.write("!Edit at your own risk!!!\n")
    for (val, name) in output:
        f.write("      INTEGER, PARAMETER :: %s = %s\n" % (name, val))

print("done")
